The team has categorised our features into these 3 broad categories: Sentiment, Technical, General.
Twint can bypass this API, but when the team attempted in the usage of it, we realised there are some limitations and unstability in the scrapping of tweets using the package, so we will not proceed with it.Given:
Added:
A paper uses the following 3 below: Link
options(scipen = 99)
library(dplyr)
library(tidyverse)
library(stringr)
library(almanac)
library(lubridate)
library(Quandl)
library(gtrendsR)
library(RcppRoll)
library(xgboost)
library(MLmetrics)
library(pageviews)
library(quantmod)
library("doFuture"); registerDoFuture(); plan(multiprocess)
library(tidyquant)
library(tidymodels)
library(tsfeatures)
library(slider)
library(timetk)
library(data.table)
library(grid)
library(plotly) #for candlestick plot
library(hablar) #for bollinger band analysis
library(skimr)
# load the csv first
twitter_volume <- read.csv("data/twitter_volume.csv")
twitter_volume$ï..date <- as.Date(twitter_volume$ï..date)
colnames(twitter_volume) <- c("date", "tweets_volume")
bitcoin_model <- bitcoin_model %>% left_join(twitter_volume)
## Joining, by = "date"
bitcoin_model$tweets_volume[is.na(bitcoin_model$tweets_volume)] <- 0 # Replace the missing value before dec 2013 with 0
ggplot(bitcoin_model) +
geom_bar(aes(x=date, y=tweets_volume),stat="identity", fill="tan1", colour="sienna3")+
geom_line(aes(x=date, y=close*50),stat="identity") + scale_y_continuous(sec.axis = sec_axis(~./50, name = "bitcoin_price"))
# create Bollinger Bands
bitcoin_price_data <- bitcoin_price
bitcoin_price_data <- bitcoin_price_data %>%
convert(num(open:weighted_price))
bbands <- BBands(bitcoin_price_data[,c("high","low","close")]) #dn - The lower bollinger band, mavg - middle moving average, up - the upper bollinger band
# join and subset data
date_band <- "2011-09-13"
df_1 <- subset(cbind(bitcoin_price, data.frame(bbands[,1:3])), date >= date_band)
row.names(df_1) <- NULL
df_1 <- df_1 %>% mutate(bb_width = (up-dn)/mavg)
df_1 <- df_1 %>% mutate(bb_percent_b = (close-dn)/(up-dn))
bb_df <- df_1 %>% select(c(date,bb_width,bb_percent_b))
bitcoin_model <- bitcoin_model %>% left_join(bb_df)
## Joining, by = "date"
df_2 <- subset(df_1, date>= "2018-01-01" & date <= "2020-11-30")
# colors column for increasing and decreasing
for (i in 1:length(df_2[,1])) {
if (df_2$close[i] >= df_2$open[i]) {
df_2$direction[i] = 'Increasing'
} else {
df_2$direction[i] = 'Decreasing'
}
}
i <- list(line = list(color = '#008000'))
d <- list(line = list(color = '#FF0000'))
# plot candlestick chart
fig2 <- df_2 %>% plot_ly(x = ~date, type="candlestick",
open = ~open, close = ~close,
high = ~high, low = ~low, name = "BTC",
increasing = i, decreasing = d)
fig2 <- fig2 %>% add_lines(x = ~date, y = ~up , name = "B Bands",
line = list(color = '#000080', width = 0.5),
legendgroup = "Bollinger Bands",
hoverinfo = "none", inherit = F)
fig2 <- fig2 %>% add_lines(x = ~date, y = ~dn, name = "B Bands",
line = list(color = '#000080', width = 0.5),
legendgroup = "Bollinger Bands", inherit = F,
showlegend = FALSE, hoverinfo = "none")
fig2 <- fig2 %>% add_lines(x = ~date, y = ~mavg, name = "Mv Avg",
line = list(color = '#800000', width = 0.5),
hoverinfo = "none", inherit = F)
fig2 <- fig2 %>% layout(yaxis = list(title = "Price"))
# plot volume bar chart
fig3 <- df_2
fig3 <- fig3 %>% plot_ly(x=~date, y=~volume_currency, type='bar', name = "BTC Volume",
color = ~direction, colors = c('#008000','#FF0000'))
fig3 <- fig3 %>% layout(yaxis = list(title = "Volume"))
# create rangeselector buttons
rs <- list(visible = TRUE, x = 0.5, y = -0.055,
xanchor = 'center', yref = 'paper',
font = list(size = 9),
buttons = list(
list(count=1,
label='RESET',
step='all'),
list(count=1,
label='1 YR',
step='year',
stepmode='backward'),
list(count=3,
label='3 MO',
step='month',
stepmode='backward'),
list(count=1,
label='1 MO',
step='month',
stepmode='backward')
))
# subplot with shared x axis
fig2 <- subplot(fig2, fig3, heights = c(0.7,0.2), nrows=2,
shareX = TRUE, titleY = TRUE)
fig2 <- fig2 %>% layout(title = paste("BTC: " , "2018-01-01", " -","2020-11-30"),
xaxis = list(rangeselector = rs),
legend = list(orientation = 'h', x = 0.5, y = 1,
xanchor = 'center', yref = 'paper',
font = list(size = 10),
bgcolor = 'transparent'))
fig2
plot1 <- ggplot(df_2, aes(x = date)) + geom_line(aes(y = close, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plot2 <- ggplot(df_2, aes(x = date)) + geom_line(aes(y = bb_width, colour = "BOLL BW")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot1), ggplotGrob(plot2), size = "last"))
bitcoin_model_date <- as.data.frame(bitcoin_model$date)
rsi <- as.data.frame(RSI(bitcoin_model$close,14))
rsi <- cbind(bitcoin_model_date,rsi)
colnames(rsi) <- c("date","rsi")
bitcoin_model <- bitcoin_model %>% left_join(rsi)
## Joining, by = "date"
plot3 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = close, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plot4 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = rsi, colour = "RSI")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot3), ggplotGrob(plot4), size = "last"))
# another visualisation instead
rsi_vs_close <- ggplot(bitcoin_model, aes(x = date))
rsi_vs_close <- rsi_vs_close + geom_line(aes(y = close, colour = "Bitcoin Closing Price"))
rsi_vs_close <- rsi_vs_close + geom_line(aes(y = rsi*100, colour = "Relative Strength Index"))
rsi_vs_close <- rsi_vs_close + scale_y_continuous(sec.axis = sec_axis(~./100, name = "Title"))
rsi_vs_close <- rsi_vs_close + scale_colour_manual(values = c("darkred", "steelblue"))
rsi_vs_close
We will take a look together with Band Width. RSI - tells the direction, bb_width - tells the intensity. E.g. Band width becoming narrower, can be either at the top of bottom of bollinger band, so to determine, look at RSI. RSI increasing from below 30 means is at the bottom of the band and hence can possibly buy.
# Combined with bb_width
# e.g. RSI going down --> towards oversold band width increases a lot too, may be an indication to buy
# e.g. RSI going up --> bullish, already gaining momentum, bandwidth increasing too, may also be an indication to buy, so depends on the risk appetite of investor when to enter.
plot2 <- ggplot(df_1, aes(x = date)) + geom_line(aes(y = bb_width, colour = "Bollinger Band Width")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
plot3 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = close, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plot4 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = rsi, colour = "RSI")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot3), ggplotGrob(plot4), ggplotGrob(plot2), size = "last"))
# Usually, K = 9 days, S = 12 days and L = 26 days
myMACD <- function (x,price,S,L,K){
MACD <- EMA(price,S) - EMA(price,L)
signal <- EMA(MACD,K)
date <- x[,1]
price <- price
output <- cbind(date,price, MACD,signal)
colnames(output) <- c("date","closing_price", "MACD","signal")
return(output)
}
macd <- myMACD(bitcoin_price,Cl(bitcoin_price), 12, 26,9)
tail(macd,n=5)
## date closing_price MACD signal
## 3367 2020-11-30 19700.19 1023.825 1097.046
## 3368 2020-12-01 18771.43 1004.070 1078.451
## 3369 2020-12-02 19226.55 1013.456 1065.452
## 3370 2020-12-03 19448.40 1026.958 1057.753
## 3371 2020-12-04 19437.67 1024.977 1051.198
macd <- macd %>% mutate(macd_signal_dist = MACD-signal)
macd_df <- macd %>% select(c(date,macd_signal_dist))
bitcoin_model <- bitcoin_model %>% left_join(macd_df)
## Joining, by = "date"
macd$direction <- ifelse(macd$macd_signal_dist>0,"Increasing","Decreasing")
macd_chart <- ggplot(macd, aes(x = date))
macd_chart <- macd_chart + geom_line(aes(y = MACD, colour = "Moving Average Covergence Divergence (12,6,9)"))
macd_chart <- macd_chart + geom_line(aes(y = signal, colour = "Signal"), linetype="dashed")
macd_chart <- macd_chart + geom_bar(aes(y = macd_signal_dist,fill = direction),stat = "identity") + scale_fill_manual(values = c("Increasing" = "#008000", "Decreasing" = "#FF0000"))
macd_chart <- macd_chart + scale_colour_manual(values = c("darkred", "steelblue"))
macd_chart <- macd_chart + scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')))
macd_chart
plot5<- ggplot(macd, aes(x = date)) + geom_line(aes(y = closing_price, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot5), ggplotGrob(macd_chart), size = "last"))
acu_dis_df <- bitcoin_price
acu_dis_df <- acu_dis_df %>% mutate(cmfv = ((bitcoin_price$close-bitcoin_price$low)-(bitcoin_price$high-bitcoin_price$close))/(bitcoin_price$high-bitcoin_price$low)*bitcoin_price$volume_currency)
acu_dis_df$cmfv[is.na(acu_dis_df$cmfv)] <- 0
acu_dis_df <- acu_dis_df %>% mutate(cumulative_cmfv = cumsum(cmfv))
acu_dis_df_add <- acu_dis_df %>% select(date,cumulative_cmfv)
bitcoin_model <- bitcoin_model %>% left_join(acu_dis_df_add)
## Joining, by = "date"
ggplot(acu_dis_df) +
geom_line(aes(x=date, y=cumulative_cmfv),stat="identity", colour="sienna3")+
geom_line(aes(x=date, y=close*1000000),stat="identity") + scale_y_continuous(sec.axis = sec_axis(~./1000000, name = "bitcoin_price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y"))
EMA: More weightage placed on recent price levels rather than past ones compared to using SMA
# 12-days EMA, 24-days EMA, 72-days EMA
bitcoin_date <- as.data.frame(bitcoin_price$date)
colnames(bitcoin_date) <- c("date")
bitcoin_ema <- list()
range_ema <- c(12,24,72)
for (i in seq_along(range_ema)){
bitcoin_ema_v <- EMA(Cl(bitcoin_price),n=i)
bitcoin_ema[[i]] <- bitcoin_ema_v
}
bitcoin_ema <- as.data.frame(do.call(cbind, bitcoin_ema))
names_ema <- c(paste0("ema_", range_ema)) %>% strsplit(split = " ")
colnames(bitcoin_ema) <- names_ema
ema <- bitcoin_date %>% cbind(bitcoin_ema)
bitcoin_model <- bitcoin_model %>% left_join(ema)
## Joining, by = "date"
# 12-days SMA, 24-days SMA, 72-days SMA
bitcoin_sma <- list()
range_sma <- c(12,24,72)
for (i in seq_along(range_sma)){
bitcoin_sma_v <- SMA(Cl(bitcoin_price),n=i)
bitcoin_sma[[i]] <- bitcoin_sma_v
}
bitcoin_sma <- as.data.frame(do.call(cbind, bitcoin_sma))
names_sma <- c(paste0("sma_", range_sma)) %>% strsplit(split = " ")
colnames(bitcoin_sma) <- names_sma
sma <- bitcoin_date %>% cbind(bitcoin_sma)
bitcoin_model <- bitcoin_model %>% left_join(sma)
## Joining, by = "date"
# Assumptions:
# short term: 14,28
# longer term: 90,180,200
bitcoin_roc <- list()
range_roc <- c(14,28,90,180,200)
for (i in seq_along(range_roc)){
bitcoin_roc_v <- ROC(Cl(bitcoin_price),n=i)
bitcoin_roc[[i]] <- bitcoin_roc_v
}
bitcoin_roc <- as.data.frame(do.call(cbind, bitcoin_roc))
names_roc <- c(paste0("roc_", range_roc)) %>% strsplit(split = " ")
colnames(bitcoin_roc) <- names_roc
bitcoin_roc <- bitcoin_date %>% cbind(bitcoin_roc)
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_roc)
## Joining, by = "date"
ROC states that whenever the Rate of Change goes above zero line from below, it states a positive momentum while when the ROC goes below zero line from above; it generates a negative momentum in the price.
plota <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = roc_14, colour = "ROC_14days")) +
scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
plotb <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = roc_90, colour = "ROC_90days")) +
scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plotc <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = roc_200, colour = "ROC_200days")) +
scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("green"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plota), ggplotGrob(plotb), ggplotGrob(plotc), size = "last"))
bitcoin_obv <- as.data.frame(OBV(bitcoin_price$close, bitcoin_price$volume_currency))
bitcoin_obv <- bitcoin_date %>% cbind(bitcoin_obv)
colnames(bitcoin_obv) <- c("date","on_balance_vol")
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_obv)
## Joining, by = "date"
# Data about bitcoin activity, transaction fees, and mining.
print("Downloading bitcoin indicators.")
## [1] "Downloading bitcoin indicators."
code_list <- list(c("BCHAIN/TOTBC", "Total Bitcoins"),
c("BCHAIN/MKTCP", "Bitcoin Market Capitalization"),
c("BCHAIN/NADDU", "Bitcoin Number of Unique Addresses Used"),
c("BCHAIN/ETRAV", "Bitcoin Estimated Transaction Volume BTC"),
c("BCHAIN/ETRVU", "Bitcoin Estimated Transaction Volume USD"),
c("BCHAIN/TRVOU", "Bitcoin USD Exchange Trade Volume"),
c("BCHAIN/NTRAN", "Bitcoin Number of Transactions"),
c("BCHAIN/NTRAT", "Bitcoin Total Number of Transactions"),
c("BCHAIN/NTREP", "Bitcoin Number of Transactions Excluding Popular Addresses"),
c("BCHAIN/NTRBL", "Bitcoin Number of Tansaction per Block"),
c("BCHAIN/ATRCT", "Bitcoin Median Transaction Confirmation Time"),
c("BCHAIN/TRFEE", "Bitcoin Total Transaction Fees"),
c("BCHAIN/TRFUS", "Bitcoin Total Transaction Fees USD"),
c("BCHAIN/CPTRA", "Bitcoin Cost Per Transaction"),
c("BCHAIN/CPTRV", "Bitcoin Cost % of Transaction Volume"),
c("BCHAIN/BLCHS", "Bitcoin api.blockchain Size"),
c("BCHAIN/AVBLS", "Bitcoin Average Block Size"),
c("BCHAIN/TOUTV", "Bitcoin Total Output Volume"),
c("BCHAIN/HRATE", "Bitcoin Hash Rate"),
c("BCHAIN/MIREV", "Bitcoin Miners Revenue"),
c("BCHAIN/BCDDE", "Bitcoin Days Destroyed"),
c("BCHAIN/BCDDW", "Bitcoin Days Destroyed Minimum Age 1 Week"),
c("BCHAIN/BCDDM", "Bitcoin Days Destroyed Minimum Age 1 Month"),
c("BCHAIN/BCDDY", "Bitcoin Days Destroyed Minimum Age 1 Year") ,
c("BCHAIN/BCDDC", "Bitcoin Days Destroyed Cumulative"))
bitcoin_data <- tibble()
# bitcoin_data <- foreach(i=seq_along(code_list), .combine='bind_rows') %dopar% {quandl_tidy(code_list[[i]][1], code_list[[i]][2])}
for (i in seq_along(code_list)) {
print(str_c("Downloading data for ", code_list[[i]][1], "."))
bitcoin_data <- bind_rows(bitcoin_data,
quandl_tidy(code_list[[i]][1], code_list[[i]][2]))
}
## [1] "Downloading data for BCHAIN/TOTBC."
## [1] "Downloading data for BCHAIN/MKTCP."
## [1] "Downloading data for BCHAIN/NADDU."
## [1] "Downloading data for BCHAIN/ETRAV."
## [1] "Downloading data for BCHAIN/ETRVU."
## [1] "Downloading data for BCHAIN/TRVOU."
## [1] "Downloading data for BCHAIN/NTRAN."
## [1] "Downloading data for BCHAIN/NTRAT."
## [1] "Downloading data for BCHAIN/NTREP."
## [1] "Downloading data for BCHAIN/NTRBL."
## [1] "Downloading data for BCHAIN/ATRCT."
## [1] "Downloading data for BCHAIN/TRFEE."
## [1] "Downloading data for BCHAIN/TRFUS."
## [1] "Downloading data for BCHAIN/CPTRA."
## [1] "Downloading data for BCHAIN/CPTRV."
## [1] "Downloading data for BCHAIN/BLCHS."
## [1] "Downloading data for BCHAIN/AVBLS."
## [1] "Downloading data for BCHAIN/TOUTV."
## [1] "Downloading data for BCHAIN/HRATE."
## [1] "Downloading data for BCHAIN/MIREV."
## [1] "Downloading data for BCHAIN/BCDDE."
## [1] "Downloading data for BCHAIN/BCDDW."
## [1] "Downloading data for BCHAIN/BCDDM."
## [1] "Downloading data for BCHAIN/BCDDY."
## [1] "Downloading data for BCHAIN/BCDDC."
# Clean Bitcoin Indicators
bitcoin_data <- bitcoin_data %>%
select(date, value, code) %>%
spread(code, value)
colnames(bitcoin_data) <- make.names(colnames(bitcoin_data))
names(bitcoin_data) <- tolower(names(bitcoin_data))
# NVT
bitcoin_data$nvt <- bitcoin_data$bchain.mktcp/bitcoin_data$bchain.etrvu
bitcoin_data_nvt <- select(bitcoin_data,c(date,nvt))
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_data_nvt)
## Joining, by = "date"
atr <- ATR(bitcoin_price[,c("high","low","close")], n=14) #14-days moving average
atr <- bitcoin_date %>% cbind(atr)
atr <- select(atr,c(date,atr))
bitcoin_model <- bitcoin_model %>% left_join(atr)
## Joining, by = "date"
# Scrape Google Data
download_all <- FALSE
if (download_all == TRUE) {
dates <- tibble(dates = ymd("2011-01-01") + months(0:120)) %>% filter(dates <= Sys.Date())
} else {
dates <- tibble(dates = ymd("2017-01-01") + months(0:120)) %>% filter(dates <= Sys.Date())
}
google_trends <- function(query, begin_date, end_date) {
df <- gtrends(keyword = 'bitcoin',
time = str_c(begin_date, ' ', end_date))[['interest_over_time']] %>%
select(date, hits) %>%
mutate(date = as.Date(date)) %>%
as_tibble()
return(df)
}
# Download Google Trends Daily Data
for (i in 1:nrow(dates)) {
month <- dates[["dates"]][i]
begin_date <- as.Date(month)
end_date <- as.Date(month) + months(1) - days(1)
end_date <- as.Date(ifelse(end_date >= Sys.Date(), Sys.Date(), end_date))
print(str_c("Downloading Google Trends data from ", begin_date, " to ", end_date, "."))
df <- google_trends("bitcoin", begin_date, end_date)
write_csv(df, str_c("C:\\Users\\marcu\\Desktop\\NUS BBA3\\Y4.S1\\DBA4761\\Final Project\\Own Version\\google-trend-daily\\google-trends-daily-", begin_date, "-", end_date, ".csv"))
}
# Download Google Trends Monthly Data
monthly <- google_trends("bitcoin", "2011-01-01", Sys.Date()) %>%
rename(hits_monthly = hits)
# change those <1 to 1 as later the conversion to numeric will not have NA instead
monthly$hits_monthly[monthly$hits_monthly=="<1"]<- "1"
# Rebase
bitcoin_google <- list.files('google-trend-daily') %>%
map_df(~ read_csv(file.path('google-trend-daily', str_c(.)), col_types = c('Di'))) %>%
rename(hits_daily = hits) %>%
left_join(monthly) %>%
fill(hits_monthly) %>%
mutate(hits_monthly = as.numeric(hits_monthly),
hits_daily = hits_daily * hits_monthly / 100)
The 3 chunks above are how the google trends were obtained. Since we have already ran this previously, now we will just load in the data that we have saved as csv and add to the model below.
bitcoin_google <- read.csv("data/bitcoin_google.csv")
bitcoin_google$date <- as.Date(bitcoin_google$date)
# add to model
bitcoin_model <- bitcoin_model %>%
left_join(bitcoin_google)
## Joining, by = "date"
bitcoin_data_df <- select(bitcoin_data, -c(nvt))
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_data_df)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>%
mutate(close_drawdown = -1 * (1 - close / cummax(close)))
get_yahoo <- function(ticker) {
df <- getSymbols(ticker, src = 'yahoo', auto.assign = FALSE, from = '1900-01-01')
df <- df %>%
as_tibble() %>%
mutate(date = index(df))
colnames(df) <- c("open", "high", "low", "close", "volume", "adjusted_close", "date", "ticker")
return(df)
}
vix <- get_yahoo('^VIX') %>%
select(date, vix = adjusted_close)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
bitcoin_model <- bitcoin_model %>% left_join(vix)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(vix, .direction = "down")
# load the csv first
fedfundsrate <- read.csv("data/fedfundsrate.csv")
fedfundsrate$DATE <- as.Date(fedfundsrate$DATE)
fedfundsrate <- subset(fedfundsrate, DATE>=as.Date("2011-09-13"))
colnames(fedfundsrate) <- c("date", "effr")
fedfundsrate$effr[fedfundsrate$effr=="."] <- NA
fedfundsrate <- fedfundsrate %>% map_df(na.locf)
fedfundsrate$effr <- as.numeric(fedfundsrate$effr)
bitcoin_model <- bitcoin_model %>% left_join(fedfundsrate)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(effr,.direction = "down")
sp500 <- get_yahoo('^GSPC') %>%
select(date, sp500 = adjusted_close)
bitcoin_model <- bitcoin_model %>% left_join(sp500)
## Joining, by = "date"
# fill na downwards for weekend missing values - fill with fri closing price
bitcoin_model <- bitcoin_model %>% fill(sp500, .direction = "down")
# load the csv first
xau_usd_historical <- read.csv("data/xau_usd_historical.csv")
xau_usd_historical$ï..Date <- as.Date(xau_usd_historical$ï..Date, format="%B %d, %Y")
xau_usd_df <- select(xau_usd_historical,c(ï..Date,Price))
colnames(xau_usd_df) <- c("date","gold_price")
bitcoin_model <- bitcoin_model %>% left_join(xau_usd_df)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(gold_price,.direction = "down")
ggplot(bitcoin_model) +
geom_line(aes(x=date, y=close),stat="identity", colour="sienna3")+
geom_line(aes(x=date, y=gold_price*10),stat="identity") + scale_y_continuous(sec.axis = sec_axis(~./10, name = "gold_price"))
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y"))
## <ScaleContinuousDate>
## Range:
## Limits: 1.75e+04 -- 1.86e+04
The OFR Financial Stress Index (OFR FSI) is a daily market-based snapshot of stress in global financial markets. It is constructed from 33 financial market variables, such as yield spreads, valuation measures, and interest rates. The OFR FSI is positive when stress levels are above average, and negative when stress levels are below average.
The OFR FSI incorporates five categories of indicators: credit, equity valuation, funding, safe assets and volatility. The FSI shows stress contributions by three regions: United States, other advanced economies, and emerging markets. The value of the OFR FSI on a given day is the weighted average level of each variable observed in the market on that day, relative to its history. The index is zero when this average is zero, suggesting that stress is at normal levels. The index is calculated after each U.S. trading day.
# load the csv first
fsi<- read.csv("data/fsi.csv")
fsi_df <- fsi %>% select(date="Date", ofr_fsi = "OFR.FSI")
fsi_df$date <- as.Date(fsi_df$date)
bitcoin_model <- bitcoin_model %>% left_join(fsi_df)
## Joining, by = "date"
#weekend missing, fill by fri's numbers
bitcoin_model <- bitcoin_model %>% fill(ofr_fsi,.direction = "down")
The AAII Investor Sentiment Survey measures the percentage of individual investors who are bullish, bearish, and neutral on the stock market for the next six months; individuals are polled from the ranks of the AAII membership on a weekly basis. Only one vote per member is accepted in each weekly voting period.
Since this AAII sentiment (comprises bullish, neutral and bearish sentiment) is for stock market, we take the assumption that bitcoin is seen as a “safe haven” compared to stock, so we will take the bearish sentiment.
This is a weekly number, so we will just fill the every day with that week’s number.
# The quandl_tidy function is a wrapper around the Quandl function that returns a cleaner tibble.
Quandl.api_key("s6CuJx8yVodVwU-ymz_5")
aaii_sentiment <- Quandl("AAII/AAII_SENTIMENT") %>%
arrange(Date) %>%
as_tibble()
aaii_sentiment_df <- aaii_sentiment %>% select(date="Date", aaii_bearish_sentiment = "Bearish")
bitcoin_model <- bitcoin_model %>% left_join(aaii_sentiment_df)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(aaii_bearish_sentiment,.direction = "up")
cny_usd <- get_yahoo('USDCNY=X') %>%
select(date, cny_usd_close = adjusted_close)
bitcoin_model <- bitcoin_model %>% left_join(cny_usd)
## Joining, by = "date"
# fill na downwards for weekend missing values - fill with fri closing price
bitcoin_model <- bitcoin_model %>% fill(cny_usd_close, .direction = "down")
# load in the data first
us_dollar_index_historical_data <- read.csv("data/us_dollar_index_historical_data.csv")
us_dollar_index_historical_data$ï..Date <- as.Date(us_dollar_index_historical_data$ï..Date, format="%B %d, %Y" )
us_dollar_index <- select(us_dollar_index_historical_data, c(ï..Date,Price))
colnames(us_dollar_index) <- c("date","dollar_index_close")
bitcoin_model <- bitcoin_model %>% left_join(us_dollar_index)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(dollar_index_close, .direction = "down")
Another way to get a comparison of how any particular crypto is moving relative to the crypto market is to build an index to compare it to. An index will also be able to show trends and changes in investing patterns.
We have created a simple crypto index based on weighted market cap of top 10 crypto. (Market capitalization-weighted). Using an index, an investor can see a mood of the market and can make informed decisions.
Firstly, Asset selection:
Secondly, Allocation Distribution: Research paper on root-cap weight
There are a few ways we can do this: Market cap Weighted, Square Root Market Cap Weighted (root-cap weight), Evenly Weighted, Minimum Weight, Maximum Weight.
Other index in the market already created indexes with bitcoin weight making up close to 70% of the index since it is the largest by market cap. The components with a higher market cap carry a higher weighting percentage in the index, which makes sense since it provides a steady growth for the index and reduces risk if those smaller crypto does not perform well.
However, we will attempt to look into creating a square root market cap weighted with the intention to just have better diversification of the weights of the 10 crypto in this index to see how it will perform.
Weightage: | Cryptocurrency | Weights (%) | | ———– | ———– | | BTC | 28.4 | | XRP | 18.1 | | ETH | 16.2 | | BCH | 12.0 | | ADA | 8.16 | | LTC | 6.64 | | XLM | 5.50 | | USDT | 2.21 | | BNB | 1.72 | | BNB | 0.95 |
Further improvement in future could be looking at setting a min weight for those lower rank, or setting a max weight to Bitcoin. Further research into each of the crypto is still necessary to see how we can better improve the weightage of each in the index.
Currently, this is just a simple index created to serve as a benchmark for comparison.
Our index will assume a base of 100.
top10_crypto <- read.csv("data/top10_crypto.csv")
top10_crypto$ï..date <- as.Date(top10_crypto$ï..date, format="%B %d, %Y")
colnames(top10_crypto) <- c("date", "open", "high", "low", "close", "volume", "market_cap", "symbol")
mkt_cap_total <- top10_crypto %>%
group_by(date) %>%
summarise(sqrt_sum_mkt_cap = sum(sqrt(market_cap)))
## `summarise()` ungrouping output (override with `.groups` argument)
top10_crypto_df <- top10_crypto %>% left_join(mkt_cap_total)
## Joining, by = "date"
top10_crypto_df <- top10_crypto_df %>% mutate(weightage = sqrt(market_cap)/sqrt_sum_mkt_cap)
top10_crypto_df <- top10_crypto_df %>% group_by(date)
#lets assume the base of this index is 100, so the
divisor = subset(mkt_cap_total, date == "2018-01-01")$sqrt_sum_mkt_cap/100
mkt_cap_total <- mkt_cap_total %>% mutate(crypto_index = sqrt_sum_mkt_cap/divisor)
crypto_index <- mkt_cap_total %>% tq_mutate(select = crypto_index,
mutate_fun = periodReturn,
period = 'daily',
type = 'log',
col_rename = 'crypto_index_return')
# we will add the returns of index and index into the model
crypto_index_df <- select(crypto_index, c("date","crypto_index", "crypto_index_return"))
bitcoin_model <- bitcoin_model %>% left_join(crypto_index_df)
## Joining, by = "date"
bitcoin_model <- subset(bitcoin_model, date>= "2018-01-01" & date <= "2020-11-30")
bitcoin_features <- select(bitcoin_model, -c(close,future_return,future_return_sign))
ggplot(bitcoin_model, aes(x = date)) +
geom_line(aes(y = crypto_index), colour = "red", alpha = 0.8)
ggplot(bitcoin_model, aes(x = date)) +
geom_line(aes(y = future_return), colour = "blue", alpha = 0.8) +
geom_line(aes(y = crypto_index_return), colour = "red", alpha = 0.6)
Comparing to the Bitwise Select 10 Large Cap Crypto Index
index_bitx <- read.csv("data/index_bitx.csv")
index_bitx$date <- as.Date(index_bitx$date, format="%B %d %Y")
multiple_crypto_indexes_2 <- crypto_index_df %>% left_join(index_bitx)
## Joining, by = "date"
ggplot(multiple_crypto_indexes_2, aes(x = date)) +
geom_line(aes(y = crypto_index, colour = "Our Crypto Index")) +
geom_line(aes(y = bitx/200,colour = "KEYS:SW")) +
scale_y_continuous(sec.axis = sec_axis(~.*200, name = "Title"))
The following indicators does not have values since 2017, and there are no alternative sites that provide these data, so we will leave these out:
This feature has an alternative site that we can take the data from:
bitcoin_features <- select(bitcoin_model, -c(bchain.bcddc,bchain.bcdde,bchain.bcddm,bchain.bcddw,bchain.bcddy,bchain.atrct))
# bchain.atrct - Bitcoin Median Transaction Confirmation Time
# load in the data first
median_confirmation_time_bitcoin <- read.csv("data/median-confirmation-time-bitcoin.csv")
median_confirmation_time_bitcoin$ï..date <- as.Date(median_confirmation_time_bitcoin$ï..date, format="%d/%m/%Y" )
colnames(median_confirmation_time_bitcoin) <- c("date", "bchain.atrct")
bitcoin_features <- bitcoin_features %>% left_join(median_confirmation_time_bitcoin)
## Joining, by = "date"
This feature missing one value on 23 May 2020
## number obtained from blockchain website: 588174
bitcoin_features[bitcoin_features$date=="2020-05-23", "bchain.naddu"] <- 588174
# hits daily and monthly 30 Nov 2020 missing, currently fill with previous day
bitcoin_features <- bitcoin_features %>% fill(hits_daily,.direction = "down")
bitcoin_features <- bitcoin_features %>% fill(hits_monthly,.direction = "down")
Confirm if there are no missing values to prepare for models
rmarkdown::paged_table(bitcoin_features)
skim(bitcoin_features)
| Name | bitcoin_features |
| Number of rows | 1065 |
| Number of columns | 57 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| factor | 1 |
| numeric | 55 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 2018-01-01 | 2020-11-30 | 2019-06-17 | 1065 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| future_return_sign | 0 | 1 | FALSE | 2 | 1: 558, 0: 507 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| close | 0 | 1 | 8281.74 | 2870.47 | 3179.54 | 6456.77 | 8208.69 | 9902.00 | 19700.19 | ▅▇▃▁▁ |
| future_return | 0 | 1 | 0.00 | 0.04 | -0.39 | -0.01 | 0.00 | 0.02 | 0.18 | ▁▁▁▇▁ |
| tweets_volume | 0 | 1 | 96273.55 | 47910.80 | 44292.00 | 65777.00 | 80060.00 | 110723.00 | 371290.00 | ▇▂▁▁▁ |
| bb_width | 0 | 1 | 0.24 | 0.15 | 0.03 | 0.12 | 0.21 | 0.32 | 0.88 | ▇▆▂▁▁ |
| bb_percent_b | 0 | 1 | 0.53 | 0.37 | -0.75 | 0.26 | 0.53 | 0.82 | 1.79 | ▁▅▇▅▁ |
| rsi | 0 | 1 | 51.23 | 13.74 | 9.63 | 42.19 | 50.09 | 59.61 | 88.71 | ▁▃▇▃▁ |
| macd_signal_dist | 0 | 1 | 1.23 | 118.18 | -538.72 | -57.77 | 6.94 | 61.00 | 472.25 | ▁▂▇▃▁ |
| cumulative_cmfv | 0 | 1 | 7512870167.75 | 1723437413.02 | 4846877102.15 | 6023132825.36 | 7010265118.99 | 8550339719.48 | 12723602645.28 | ▇▂▆▂▁ |
| ema_12 | 0 | 1 | 8281.74 | 2870.47 | 3179.54 | 6456.77 | 8208.69 | 9902.00 | 19700.19 | ▅▇▃▁▁ |
| ema_24 | 0 | 1 | 8279.18 | 2854.91 | 3195.18 | 6457.76 | 8213.66 | 9877.74 | 19132.48 | ▃▇▅▁▁ |
| ema_72 | 0 | 1 | 8276.99 | 2842.63 | 3208.65 | 6454.71 | 8233.00 | 9919.76 | 18810.45 | ▃▇▅▁▁ |
| sma_12 | 0 | 1 | 8281.74 | 2870.47 | 3179.54 | 6456.77 | 8208.69 | 9902.00 | 19700.19 | ▅▇▃▁▁ |
| sma_24 | 0 | 1 | 8279.01 | 2856.62 | 3186.66 | 6446.83 | 8210.40 | 9900.68 | 18946.64 | ▃▇▅▁▁ |
| sma_72 | 0 | 1 | 8276.36 | 2845.42 | 3189.69 | 6450.57 | 8212.51 | 9901.66 | 18758.88 | ▃▇▅▁▁ |
| roc_14 | 0 | 1 | 0.00 | 0.04 | -0.49 | -0.01 | 0.00 | 0.02 | 0.17 | ▁▁▁▇▁ |
| roc_28 | 0 | 1 | 0.00 | 0.06 | -0.49 | -0.02 | 0.00 | 0.03 | 0.22 | ▁▁▁▇▁ |
| roc_90 | 0 | 1 | 0.00 | 0.07 | -0.49 | -0.03 | 0.00 | 0.03 | 0.25 | ▁▁▂▇▁ |
| roc_180 | 0 | 1 | 0.00 | 0.08 | -0.51 | -0.04 | 0.00 | 0.04 | 0.26 | ▁▁▂▇▁ |
| roc_200 | 0 | 1 | 0.00 | 0.09 | -0.61 | -0.04 | 0.01 | 0.05 | 0.27 | ▁▁▂▇▁ |
| on_balance_vol | 0 | 1 | -1096409073.75 | 1101578392.07 | -2940622853.82 | -2044454043.25 | -1056244942.69 | -339411405.58 | 2581654689.52 | ▇▇▇▂▁ |
| nvt | 0 | 1 | 153.65 | 74.52 | 27.65 | 106.15 | 132.72 | 177.71 | 470.50 | ▆▇▂▁▁ |
| atr | 0 | 1 | 480.16 | 320.36 | 86.93 | 301.27 | 405.51 | 538.06 | 1888.64 | ▇▅▁▁▁ |
| hits_daily | 0 | 1 | 10.29 | 4.33 | 4.56 | 7.60 | 9.28 | 11.48 | 53.00 | ▇▁▁▁▁ |
| hits_monthly | 0 | 1 | 16.06 | 8.28 | 8.00 | 12.00 | 14.00 | 18.00 | 53.00 | ▇▂▁▁▁ |
| bchain.avbls | 0 | 1 | 1.04 | 0.20 | 0.43 | 0.92 | 1.06 | 1.21 | 1.42 | ▁▂▆▇▅ |
| bchain.blchs | 0 | 1 | 226415.94 | 47234.65 | 149113.62 | 183647.83 | 224988.07 | 266481.87 | 312990.88 | ▇▇▆▇▆ |
| bchain.cptra | 0 | 1 | 51.95 | 24.10 | 18.00 | 33.93 | 48.73 | 61.94 | 146.40 | ▇▇▂▁▁ |
| bchain.cptrv | 0 | 1 | 1.51 | 0.83 | 0.28 | 0.95 | 1.36 | 1.80 | 5.72 | ▇▆▂▁▁ |
| bchain.etrav | 0 | 1 | 141157.56 | 65479.74 | 37558.21 | 99809.22 | 133866.30 | 168755.87 | 629491.33 | ▇▃▁▁▁ |
| bchain.etrvu | 0 | 1 | 1180597105.11 | 735311420.01 | 223512708.83 | 664911253.63 | 991675387.16 | 1528500615.48 | 5164208947.92 | ▇▃▁▁▁ |
| bchain.hrate | 0 | 1 | 72231222.53 | 37344448.32 | 13727615.88 | 41672924.13 | 58125055.93 | 107494237.74 | 162263115.42 | ▇▇▅▆▂ |
| bchain.mirev | 0 | 1 | 14090625.39 | 5644452.40 | 4750978.91 | 10379765.62 | 13411241.26 | 16961771.12 | 46900388.18 | ▇▇▁▁▁ |
| bchain.mktcp | 0 | 1 | 147617221755.42 | 52610262701.00 | 56200958927.90 | 112434867969.00 | 144998538657.00 | 178943347659.00 | 357155729656.00 | ▆▇▅▁▁ |
| bchain.naddu | 0 | 1 | 534927.78 | 104397.24 | 310119.00 | 459478.00 | 526880.00 | 598670.00 | 1054711.00 | ▃▇▃▁▁ |
| bchain.ntran | 0 | 1 | 285477.61 | 59811.49 | 135129.00 | 240652.00 | 295589.00 | 330103.00 | 452646.00 | ▂▅▇▆▁ |
| bchain.ntrat | 0 | 1 | 428964559.51 | 91197055.55 | 287815664.00 | 343716805.00 | 424766007.00 | 510432934.00 | 591325128.00 | ▇▆▅▆▆ |
| bchain.ntrbl | 0 | 1 | 1948.22 | 426.53 | 834.13 | 1647.23 | 2057.52 | 2280.87 | 2762.54 | ▂▃▅▇▃ |
| bchain.ntrep | 0 | 1 | 275379.37 | 57782.80 | 129732.00 | 231918.00 | 286027.00 | 320995.00 | 437027.00 | ▂▅▇▆▁ |
| bchain.totbc | 0 | 1 | 17748971.35 | 540864.46 | 16774500.00 | 17284587.50 | 17762812.50 | 18260750.00 | 18557900.00 | ▅▅▅▅▇ |
| bchain.toutv | 0 | 1 | 1330607.32 | 1049236.99 | 421940.21 | 864866.80 | 1097641.32 | 1518114.01 | 24528670.35 | ▇▁▁▁▁ |
| bchain.trfee | 0 | 1 | 64.99 | 94.55 | 9.97 | 22.56 | 33.39 | 72.11 | 778.26 | ▇▁▁▁▁ |
| bchain.trfus | 0 | 1 | 664590.94 | 1343382.55 | 42133.15 | 146878.04 | 269893.77 | 664514.69 | 12045172.46 | ▇▁▁▁▁ |
| bchain.trvou | 0 | 1 | 317567460.47 | 338531702.16 | 25368503.34 | 113743420.03 | 203978693.83 | 382232905.93 | 3094258118.19 | ▇▁▁▁▁ |
| close_drawdown | 0 | 1 | -0.57 | 0.15 | -0.83 | -0.66 | -0.57 | -0.48 | 0.00 | ▃▇▅▁▁ |
| vix | 0 | 1 | 20.23 | 10.02 | 9.15 | 13.43 | 16.57 | 24.32 | 82.69 | ▇▂▁▁▁ |
| effr | 0 | 1 | 1.49 | 0.86 | 0.04 | 1.09 | 1.70 | 2.20 | 2.45 | ▆▁▂▇▇ |
| sp500 | 0 | 1 | 2937.29 | 263.55 | 2237.40 | 2747.33 | 2884.43 | 3110.29 | 3638.35 | ▁▆▇▃▂ |
| gold_price | 0 | 1 | 1467.09 | 231.05 | 1174.16 | 1289.31 | 1353.46 | 1613.79 | 2063.19 | ▇▃▂▂▂ |
| ofr_fsi | 0 | 1 | -1.70 | 2.31 | -4.24 | -2.89 | -2.25 | -1.43 | 10.27 | ▇▂▁▁▁ |
| aaii_bearish_sentiment | 0 | 1 | 0.34 | 0.09 | 0.16 | 0.26 | 0.32 | 0.41 | 0.53 | ▂▇▅▅▃ |
| cny_usd_close | 0 | 1 | 6.82 | 0.24 | 6.27 | 6.71 | 6.88 | 7.00 | 7.18 | ▃▁▅▇▆ |
| dollar_index_close | 0 | 1 | 95.61 | 2.82 | 88.50 | 93.80 | 96.37 | 97.46 | 103.61 | ▂▃▇▅▁ |
| crypto_index | 0 | 1 | 63.40 | 12.62 | 39.20 | 55.36 | 62.08 | 70.88 | 115.30 | ▃▇▃▁▁ |
| crypto_index_return | 0 | 1 | 0.00 | 0.02 | -0.22 | -0.01 | 0.00 | 0.01 | 0.08 | ▁▁▁▇▁ |
| bchain.atrct | 0 | 1 | 9.48 | 3.21 | 3.37 | 7.22 | 8.92 | 11.10 | 25.25 | ▆▇▂▁▁ |